In [71]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
In [2]:
df = pd.read_csv(r"C:\Users\User\OneDrive\Documents\4. Github Project & Dataset\python project\sorted_data.csv")
df.head()
Out[2]:
| loan_paid_back | education_level | monthly_income | employment_status | debt_to_income_ratio | credit_score | loan_amount | interest_rate | loan_term | installment | num_of_open_accounts | total_credit_limit | current_balance | delinquency_history | public_records | num_of_delinquencies | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Master's | 2020.02 | Employed | 0.07 | 743 | 17173.72 | 13.39 | 36 | 581.88 | 7 | 40833.47 | 24302.07 | 1 | 0 | 1 |
| 1 | 1 | Bachelor's | 1681.08 | Employed | 0.22 | 531 | 22663.89 | 17.81 | 60 | 573.17 | 5 | 27968.01 | 10803.01 | 1 | 0 | 3 |
| 2 | 1 | High School | 2181.82 | Employed | 0.23 | 779 | 3631.36 | 9.53 | 60 | 76.32 | 2 | 15502.25 | 4505.44 | 0 | 0 | 0 |
| 3 | 1 | High School | 989.49 | Employed | 0.26 | 809 | 14939.23 | 7.99 | 36 | 468.07 | 7 | 18157.79 | 5525.63 | 4 | 0 | 5 |
| 4 | 1 | Other | 2110.54 | Employed | 0.26 | 663 | 16551.71 | 15.20 | 60 | 395.50 | 1 | 17467.56 | 3593.91 | 2 | 0 | 2 |
In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 20000 entries, 0 to 19999 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 loan_paid_back 20000 non-null int64 1 education_level 20000 non-null object 2 monthly_income 20000 non-null float64 3 employment_status 20000 non-null object 4 debt_to_income_ratio 20000 non-null float64 5 credit_score 20000 non-null int64 6 loan_amount 20000 non-null float64 7 interest_rate 20000 non-null float64 8 loan_term 20000 non-null int64 9 installment 20000 non-null float64 10 num_of_open_accounts 20000 non-null int64 11 total_credit_limit 20000 non-null float64 12 current_balance 20000 non-null float64 13 delinquency_history 20000 non-null int64 14 public_records 20000 non-null int64 15 num_of_delinquencies 20000 non-null int64 dtypes: float64(7), int64(7), object(2) memory usage: 2.4+ MB
In [4]:
df.value_counts("education_level")
Out[4]:
education_level Bachelor's 8045 High School 5919 Master's 3724 Other 1508 PhD 804 Name: count, dtype: int64
In [5]:
df.value_counts("employment_status")
Out[5]:
employment_status Employed 13007 Self-employed 2923 Unemployed 2113 Retired 1176 Student 781 Name: count, dtype: int64
In [6]:
df.describe()
Out[6]:
| loan_paid_back | monthly_income | debt_to_income_ratio | credit_score | loan_amount | interest_rate | loan_term | installment | num_of_open_accounts | total_credit_limit | current_balance | delinquency_history | public_records | num_of_delinquencies | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 20000.000000 | 20000.000000 | 20000.000000 | 20000.00000 | 20000.000000 | 20000.000000 | 20000.00000 | 20000.000000 | 20000.000000 | 20000.000000 | 20000.000000 | 20000.000000 | 20000.000000 | 20000.000000 |
| mean | 0.799900 | 3629.136466 | 0.177496 | 679.25695 | 15129.300909 | 12.400627 | 43.22280 | 455.625794 | 5.011800 | 48649.824769 | 24333.394631 | 1.990150 | 0.061800 | 2.489150 |
| std | 0.400085 | 2389.048326 | 0.105108 | 69.63858 | 8605.405513 | 2.442729 | 11.00838 | 274.622125 | 2.244529 | 32423.378128 | 22313.845395 | 1.474945 | 0.285105 | 1.631384 |
| min | 0.000000 | 500.000000 | 0.010000 | 373.00000 | 500.000000 | 3.140000 | 36.00000 | 9.430000 | 0.000000 | 6157.800000 | 496.350000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 1.000000 | 2021.730000 | 0.100000 | 632.00000 | 8852.695000 | 10.740000 | 36.00000 | 253.910000 | 3.000000 | 27180.492500 | 9592.572500 | 1.000000 | 0.000000 | 1.000000 |
| 50% | 1.000000 | 3048.770000 | 0.160000 | 680.00000 | 14946.170000 | 12.400000 | 36.00000 | 435.595000 | 5.000000 | 40241.615000 | 18334.555000 | 2.000000 | 0.000000 | 2.000000 |
| 75% | 1.000000 | 4556.495000 | 0.240000 | 727.00000 | 20998.867500 | 14.002500 | 60.00000 | 633.595000 | 6.000000 | 60361.257500 | 31743.327500 | 3.000000 | 0.000000 | 3.000000 |
| max | 1.000000 | 33333.330000 | 0.670000 | 850.00000 | 49039.690000 | 22.510000 | 60.00000 | 1685.400000 | 15.000000 | 454394.190000 | 352177.900000 | 11.000000 | 2.000000 | 11.000000 |
In [7]:
print(f"Minimum loan Amount: {min(df["loan_amount"])}")
Minimum loan Amount: 500.0
In [8]:
print(f"Maximum loan Amount: {max(df["loan_amount"])}")
Maximum loan Amount: 49039.69
In [9]:
print(df.isna().sum())
loan_paid_back 0 education_level 0 monthly_income 0 employment_status 0 debt_to_income_ratio 0 credit_score 0 loan_amount 0 interest_rate 0 loan_term 0 installment 0 num_of_open_accounts 0 total_credit_limit 0 current_balance 0 delinquency_history 0 public_records 0 num_of_delinquencies 0 dtype: int64
In [10]:
df.drop_duplicates()
Out[10]:
| loan_paid_back | education_level | monthly_income | employment_status | debt_to_income_ratio | credit_score | loan_amount | interest_rate | loan_term | installment | num_of_open_accounts | total_credit_limit | current_balance | delinquency_history | public_records | num_of_delinquencies | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Master's | 2020.02 | Employed | 0.07 | 743 | 17173.72 | 13.39 | 36 | 581.88 | 7 | 40833.47 | 24302.07 | 1 | 0 | 1 |
| 1 | 1 | Bachelor's | 1681.08 | Employed | 0.22 | 531 | 22663.89 | 17.81 | 60 | 573.17 | 5 | 27968.01 | 10803.01 | 1 | 0 | 3 |
| 2 | 1 | High School | 2181.82 | Employed | 0.23 | 779 | 3631.36 | 9.53 | 60 | 76.32 | 2 | 15502.25 | 4505.44 | 0 | 0 | 0 |
| 3 | 1 | High School | 989.49 | Employed | 0.26 | 809 | 14939.23 | 7.99 | 36 | 468.07 | 7 | 18157.79 | 5525.63 | 4 | 0 | 5 |
| 4 | 1 | Other | 2110.54 | Employed | 0.26 | 663 | 16551.71 | 15.20 | 60 | 395.50 | 1 | 17467.56 | 3593.91 | 2 | 0 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 19995 | 0 | Bachelor's | 3303.34 | Employed | 0.28 | 691 | 16322.23 | 15.05 | 36 | 566.22 | 2 | 23748.10 | 5801.45 | 1 | 0 | 4 |
| 19996 | 1 | Bachelor's | 2671.91 | Employed | 0.37 | 758 | 16697.34 | 11.89 | 36 | 553.71 | 8 | 49929.65 | 40901.31 | 3 | 0 | 3 |
| 19997 | 1 | Master's | 1553.50 | Student | 0.11 | 751 | 23924.78 | 10.06 | 36 | 772.66 | 3 | 13137.57 | 5075.67 | 1 | 0 | 2 |
| 19998 | 1 | Master's | 1848.45 | Retired | 0.28 | 646 | 16920.13 | 16.06 | 36 | 595.36 | 5 | 19580.82 | 3876.16 | 4 | 0 | 5 |
| 19999 | 0 | Other | 1978.14 | Employed | 0.23 | 630 | 15769.75 | 13.07 | 36 | 531.88 | 8 | 43013.59 | 12753.03 | 2 | 0 | 2 |
20000 rows × 16 columns
In [11]:
plt.figure(figsize=(10,6))
sns.countplot(df, x="loan_paid_back",color = 'orange')
plt.title("Count Plot of Loan Paid Back")
plt.xlabel("Loan Paid Back")
plt.show()
In [12]:
plt.figure(figsize=(10,6))
sns.boxplot(df, x="monthly_income")
plt.title("Boxplot of Monthly Income")
plt.xlabel("Monthly Income")
plt.show()
In [13]:
seventyfive_percentile_mothlyincome=df["monthly_income"].quantile(0.75)
In [14]:
tewntyfive_percentile_mothlyincome=df["monthly_income"].quantile(0.25)
In [15]:
IQR=seventyfive_percentile_mothlyincome-tewntyfive_percentile_mothlyincome
print(IQR)
2534.765
In [16]:
upper_outlier = seventyfive_percentile_mothlyincome + (1.5*IQR)
lower_outlier = tewntyfive_percentile_mothlyincome - (1.5*IQR)
print(upper_outlier, lower_outlier)
8358.6425 -1780.4175
In [17]:
no_outliers= df[(df["monthly_income"]>=lower_outlier) & (df["monthly_income"]<=upper_outlier)].copy()
print(no_outliers)
loan_paid_back education_level monthly_income employment_status \
0 1 Master's 2020.02 Employed
1 1 Bachelor's 1681.08 Employed
2 1 High School 2181.82 Employed
3 1 High School 989.49 Employed
4 1 Other 2110.54 Employed
... ... ... ... ...
19995 0 Bachelor's 3303.34 Employed
19996 1 Bachelor's 2671.91 Employed
19997 1 Master's 1553.50 Student
19998 1 Master's 1848.45 Retired
19999 0 Other 1978.14 Employed
debt_to_income_ratio credit_score loan_amount interest_rate \
0 0.07 743 17173.72 13.39
1 0.22 531 22663.89 17.81
2 0.23 779 3631.36 9.53
3 0.26 809 14939.23 7.99
4 0.26 663 16551.71 15.20
... ... ... ... ...
19995 0.28 691 16322.23 15.05
19996 0.37 758 16697.34 11.89
19997 0.11 751 23924.78 10.06
19998 0.28 646 16920.13 16.06
19999 0.23 630 15769.75 13.07
loan_term installment num_of_open_accounts total_credit_limit \
0 36 581.88 7 40833.47
1 60 573.17 5 27968.01
2 60 76.32 2 15502.25
3 36 468.07 7 18157.79
4 60 395.50 1 17467.56
... ... ... ... ...
19995 36 566.22 2 23748.10
19996 36 553.71 8 49929.65
19997 36 772.66 3 13137.57
19998 36 595.36 5 19580.82
19999 36 531.88 8 43013.59
current_balance delinquency_history public_records \
0 24302.07 1 0
1 10803.01 1 0
2 4505.44 0 0
3 5525.63 4 0
4 3593.91 2 0
... ... ... ...
19995 5801.45 1 0
19996 40901.31 3 0
19997 5075.67 1 0
19998 3876.16 4 0
19999 12753.03 2 0
num_of_delinquencies
0 1
1 3
2 0
3 5
4 2
... ...
19995 4
19996 3
19997 2
19998 5
19999 2
[19076 rows x 16 columns]
In [18]:
print(no_outliers["monthly_income"].describe())
count 19076.000000 mean 3275.431041 std 1679.810750 min 500.000000 25% 1981.797500 50% 2940.785000 75% 4288.657500 max 8357.400000 Name: monthly_income, dtype: float64
In [19]:
plt.figure(figsize=(10,6))
sns.histplot(df, x="monthly_income",bins=50 ,color = 'green', kde=True)
plt.title("Histogram of Monthly Income with outliers")
plt.xlabel("Monthly Income")
plt.ylabel("Count")
Out[19]:
Text(0, 0.5, 'Count')
In [20]:
plt.figure(figsize=(10,6))
sns.histplot(no_outliers, x="monthly_income",bins=50 ,color = 'green', kde=True)
plt.title("Histogram of Monthly Income without outliers")
plt.xlabel("Monthly Income")
plt.ylabel("Count")
Out[20]:
Text(0, 0.5, 'Count')
In [21]:
plt.figure(figsize=(10,6))
sns.histplot(no_outliers, x="loan_amount",bins=50 ,color = 'red', kde=True)
plt.title("Histogram of Loan amount")
plt.xlabel("Loan Amount")
plt.ylabel("Count")
Out[21]:
Text(0, 0.5, 'Count')
In [22]:
df["loan_amount"].describe()
Out[22]:
count 20000.000000 mean 15129.300909 std 8605.405513 min 500.000000 25% 8852.695000 50% 14946.170000 75% 20998.867500 max 49039.690000 Name: loan_amount, dtype: float64
In [23]:
plt.figure(figsize=(10,6))
sns.histplot(df, x="credit_score",bins=50 ,color = 'blue', kde=True, line_kws={'color': 'black'})
plt.title("Histogram of Credit Score")
plt.xlabel("Credit Score")
plt.ylabel("Density")
Out[23]:
Text(0, 0.5, 'Density')
In [24]:
skewness = no_outliers.skew(numeric_only=True)
print(skewness)
loan_paid_back -1.503481 monthly_income 0.780185 debt_to_income_ratio 0.789635 credit_score -0.073550 loan_amount 0.255350 interest_rate 0.026442 loan_term 0.872613 installment 0.469447 num_of_open_accounts 0.442304 total_credit_limit 1.163923 current_balance 1.569846 delinquency_history 0.824823 public_records 5.018849 num_of_delinquencies 0.720254 dtype: float64
In [25]:
no_outliers["income_to_loan_ratio"]=((no_outliers["monthly_income"]*12)/(no_outliers["loan_amount"]))
no_outliers["utilization_ratio"]=((no_outliers["current_balance"]*12)/(no_outliers["total_credit_limit"]))
In [26]:
no_outliers['education_encoded'] = LabelEncoder().fit_transform(no_outliers['education_level'])
In [27]:
no_outliers['employment_encoded'] = LabelEncoder().fit_transform(no_outliers['employment_status'])
In [28]:
no_outliers.head()
Out[28]:
| loan_paid_back | education_level | monthly_income | employment_status | debt_to_income_ratio | credit_score | loan_amount | interest_rate | loan_term | installment | num_of_open_accounts | total_credit_limit | current_balance | delinquency_history | public_records | num_of_delinquencies | income_to_loan_ratio | utilization_ratio | education_encoded | employment_encoded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Master's | 2020.02 | Employed | 0.07 | 743 | 17173.72 | 13.39 | 36 | 581.88 | 7 | 40833.47 | 24302.07 | 1 | 0 | 1 | 1.411473 | 7.141809 | 2 | 0 |
| 1 | 1 | Bachelor's | 1681.08 | Employed | 0.22 | 531 | 22663.89 | 17.81 | 60 | 573.17 | 5 | 27968.01 | 10803.01 | 1 | 0 | 3 | 0.890093 | 4.635157 | 0 | 0 |
| 2 | 1 | High School | 2181.82 | Employed | 0.23 | 779 | 3631.36 | 9.53 | 60 | 76.32 | 2 | 15502.25 | 4505.44 | 0 | 0 | 0 | 7.209927 | 3.487576 | 1 | 0 |
| 3 | 1 | High School | 989.49 | Employed | 0.26 | 809 | 14939.23 | 7.99 | 36 | 468.07 | 7 | 18157.79 | 5525.63 | 4 | 0 | 5 | 0.794812 | 3.651742 | 1 | 0 |
| 4 | 1 | Other | 2110.54 | Employed | 0.26 | 663 | 16551.71 | 15.20 | 60 | 395.50 | 1 | 17467.56 | 3593.91 | 2 | 0 | 2 | 1.530143 | 2.468972 | 3 | 0 |
no_outliers.to_csv("tranform_dataset.csv")
In [29]:
no_outliers.corr(numeric_only=True)
Out[29]:
| loan_paid_back | monthly_income | debt_to_income_ratio | credit_score | loan_amount | interest_rate | loan_term | installment | num_of_open_accounts | total_credit_limit | current_balance | delinquency_history | public_records | num_of_delinquencies | income_to_loan_ratio | utilization_ratio | education_encoded | employment_encoded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| loan_paid_back | 1.000000 | 0.019801 | -0.222571 | 0.202053 | -0.003618 | -0.113965 | -0.003623 | -0.010736 | 0.003875 | 0.010068 | 0.003278 | -0.083413 | 0.005399 | -0.070747 | 0.005966 | -0.005898 | 0.014446 | -0.489946 |
| monthly_income | 0.019801 | 1.000000 | -0.006746 | 0.005617 | -0.003758 | -0.005073 | -0.006311 | -0.003457 | 0.002780 | 0.842452 | 0.570038 | 0.003121 | -0.001050 | 0.001463 | 0.212248 | 0.003826 | -0.000182 | -0.000054 |
| debt_to_income_ratio | -0.222571 | -0.006746 | 1.000000 | -0.021741 | -0.004013 | 0.014475 | 0.005474 | -0.002954 | -0.005415 | -0.001166 | -0.001496 | 0.227326 | -0.001707 | 0.209722 | -0.005432 | -0.002116 | -0.005339 | 0.011981 |
| credit_score | 0.202053 | 0.005617 | -0.021741 | 1.000000 | 0.007603 | -0.569435 | 0.004303 | -0.030968 | -0.003292 | -0.002704 | -0.008101 | -0.162587 | 0.003588 | -0.142789 | -0.001167 | -0.011766 | -0.001462 | 0.002165 |
| loan_amount | -0.003618 | -0.003758 | -0.004013 | 0.007603 | 1.000000 | -0.006092 | 0.001611 | 0.945011 | -0.005376 | -0.001485 | -0.008343 | -0.006647 | 0.002117 | -0.008046 | -0.477663 | -0.004393 | -0.003510 | 0.010896 |
| interest_rate | -0.113965 | -0.005073 | 0.014475 | -0.569435 | -0.006092 | 1.000000 | -0.004115 | 0.060932 | 0.001876 | 0.000677 | 0.005138 | 0.091634 | -0.003741 | 0.078712 | 0.010002 | 0.009797 | 0.001437 | 0.001008 |
| loan_term | -0.003623 | -0.006311 | 0.005474 | 0.004303 | 0.001611 | -0.004115 | 1.000000 | -0.274990 | 0.015386 | -0.002951 | -0.000285 | -0.002394 | -0.003170 | -0.002597 | -0.002119 | 0.001168 | 0.008283 | 0.002489 |
| installment | -0.010736 | -0.003457 | -0.002954 | -0.030968 | 0.945011 | 0.060932 | -0.274990 | 1.000000 | -0.009226 | -0.000905 | -0.007391 | 0.001220 | 0.004729 | -0.001148 | -0.450918 | -0.004639 | -0.007221 | 0.009685 |
| num_of_open_accounts | 0.003875 | 0.002780 | -0.005415 | -0.003292 | -0.005376 | 0.001876 | 0.015386 | -0.009226 | 1.000000 | 0.102172 | 0.066680 | -0.000318 | -0.008897 | -0.003268 | -0.008094 | 0.000687 | 0.010244 | 0.000554 |
| total_credit_limit | 0.010068 | 0.842452 | -0.001166 | -0.002704 | -0.001485 | 0.000677 | -0.002951 | -0.000905 | 0.102172 | 1.000000 | 0.673074 | 0.002854 | 0.002314 | 0.003069 | 0.177502 | -0.002482 | -0.006075 | -0.000430 |
| current_balance | 0.003278 | 0.570038 | -0.001496 | -0.008101 | -0.008343 | 0.005138 | -0.000285 | -0.007391 | 0.066680 | 0.673074 | 1.000000 | 0.004341 | -0.002354 | 0.008308 | 0.129031 | 0.648715 | -0.002374 | 0.003713 |
| delinquency_history | -0.083413 | 0.003121 | 0.227326 | -0.162587 | -0.006647 | 0.091634 | -0.002394 | 0.001220 | -0.000318 | 0.002854 | 0.004341 | 1.000000 | 0.001683 | 0.903351 | -0.002352 | 0.004545 | 0.003336 | 0.000028 |
| public_records | 0.005399 | -0.001050 | -0.001707 | 0.003588 | 0.002117 | -0.003741 | -0.003170 | 0.004729 | -0.008897 | 0.002314 | -0.002354 | 0.001683 | 1.000000 | 0.005856 | -0.006165 | -0.010095 | 0.008239 | -0.008134 |
| num_of_delinquencies | -0.070747 | 0.001463 | 0.209722 | -0.142789 | -0.008046 | 0.078712 | -0.002597 | -0.001148 | -0.003268 | 0.003069 | 0.008308 | 0.903351 | 0.005856 | 1.000000 | -0.008850 | 0.005942 | -0.000615 | -0.004960 |
| income_to_loan_ratio | 0.005966 | 0.212248 | -0.005432 | -0.001167 | -0.477663 | 0.010002 | -0.002119 | -0.450918 | -0.008094 | 0.177502 | 0.129031 | -0.002352 | -0.006165 | -0.008850 | 1.000000 | 0.003482 | -0.005918 | -0.005893 |
| utilization_ratio | -0.005898 | 0.003826 | -0.002116 | -0.011766 | -0.004393 | 0.009797 | 0.001168 | -0.004639 | 0.000687 | -0.002482 | 0.648715 | 0.004545 | -0.010095 | 0.005942 | 0.003482 | 1.000000 | -0.000294 | 0.005680 |
| education_encoded | 0.014446 | -0.000182 | -0.005339 | -0.001462 | -0.003510 | 0.001437 | 0.008283 | -0.007221 | 0.010244 | -0.006075 | -0.002374 | 0.003336 | 0.008239 | -0.000615 | -0.005918 | -0.000294 | 1.000000 | -0.009679 |
| employment_encoded | -0.489946 | -0.000054 | 0.011981 | 0.002165 | 0.010896 | 0.001008 | 0.002489 | 0.009685 | 0.000554 | -0.000430 | 0.003713 | 0.000028 | -0.008134 | -0.004960 | -0.005893 | 0.005680 | -0.009679 | 1.000000 |
In [30]:
plt.figure(figsize=(20,8))
sns.heatmap(no_outliers.corr(numeric_only=True), annot=True)
plt.title("Correlation between the numerical variables")
plt.show()
In [31]:
sns.pairplot(data=no_outliers)
plt.show()
In [32]:
X=no_outliers[["monthly_income",
"debt_to_income_ratio",
"credit_score",
"loan_amount",
"interest_rate",
"loan_term",
"installment",
"num_of_open_accounts",
"total_credit_limit",
"current_balance",
"delinquency_history",
"public_records",
"num_of_delinquencies",
"income_to_loan_ratio",
"utilization_ratio",
"education_encoded",
"employment_encoded"]]
y=no_outliers["loan_paid_back"]
In [33]:
X_train, X_test, y_train, y_test = train_test_split(
X, y,
test_size=0.25,
random_state=15
)
In [34]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
In [35]:
lr=LogisticRegression()
lr.fit(X_train_scaled, y_train)
Out[35]:
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()
In [36]:
y_pred = lr.predict(X_test_scaled)
y_prob = lr.predict_proba(X_test_scaled)[:, 1]
In [37]:
In [81]:
X = sm.add_constant(X)
model = sm.Logit(y, X)
result = model.fit()
print(result.summary())
Optimization terminated successfully.
Current function value: 0.334290
Iterations 7
Logit Regression Results
==============================================================================
Dep. Variable: loan_paid_back No. Observations: 19076
Model: Logit Df Residuals: 19058
Method: MLE Df Model: 17
Date: Tue, 13 Jan 2026 Pseudo R-squ.: 0.3312
Time: 21:53:51 Log-Likelihood: -6376.9
converged: True LL-Null: -9534.3
Covariance Type: nonrobust LLR p-value: 0.000
========================================================================================
coef std err z P>|z| [0.025 0.975]
----------------------------------------------------------------------------------------
const -3.6606 0.417 -8.786 0.000 -4.477 -2.844
monthly_income 7.139e-05 2.61e-05 2.737 0.006 2.03e-05 0.000
debt_to_income_ratio -7.0442 0.220 -32.006 0.000 -7.476 -6.613
credit_score 0.0112 0.000 26.702 0.000 0.010 0.012
loan_amount 7.049e-06 1.57e-05 0.448 0.654 -2.38e-05 3.79e-05
interest_rate 0.0124 0.012 1.050 0.294 -0.011 0.036
loan_term -0.0028 0.004 -0.685 0.493 -0.011 0.005
installment -0.0002 0.001 -0.484 0.629 -0.001 0.001
num_of_open_accounts 0.0042 0.010 0.415 0.678 -0.016 0.024
total_credit_limit -2.945e-06 2.61e-06 -1.128 0.259 -8.06e-06 2.17e-06
current_balance 5.251e-07 3.71e-06 0.142 0.887 -6.74e-06 7.79e-06
delinquency_history -0.0423 0.036 -1.163 0.245 -0.114 0.029
public_records 0.0089 0.081 0.109 0.913 -0.150 0.168
num_of_delinquencies 0.0361 0.033 1.091 0.275 -0.029 0.101
income_to_loan_ratio -0.0005 0.001 -0.357 0.721 -0.003 0.002
utilization_ratio -0.0053 0.015 -0.348 0.728 -0.036 0.025
education_encoded 0.0362 0.020 1.778 0.075 -0.004 0.076
employment_encoded -0.9549 0.016 -58.631 0.000 -0.987 -0.923
========================================================================================
In [38]:
print("\nROC-AUC Score:")
print(roc_auc_score(y_test, y_prob))
ROC-AUC Score: 0.8551456344480971
In [ ]: